home *** CD-ROM | disk | FTP | other *** search
/ Personal Computer World 2009 February / PCWFEB09.iso / Software / Linux / Kubuntu 8.10 / kubuntu-8.10-desktop-i386.iso / casper / filesystem.squashfs / usr / lib / python2.5 / email / feedparser.pyc (.txt) < prev    next >
Encoding:
Python Compiled Bytecode  |  2008-10-29  |  10.9 KB  |  450 lines

  1. # Source Generated with Decompyle++
  2. # File: in.pyc (Python 2.5)
  3.  
  4. """FeedParser - An email feed parser.
  5.  
  6. The feed parser implements an interface for incrementally parsing an email
  7. message, line by line.  This has advantages for certain applications, such as
  8. those reading email messages off a socket.
  9.  
  10. FeedParser.feed() is the primary interface for pushing new data into the
  11. parser.  It returns when there's nothing more it can do with the available
  12. data.  When you have no more data to push into the parser, call .close().
  13. This completes the parsing and returns the root message object.
  14.  
  15. The other advantage of this parser is that it will never throw a parsing
  16. exception.  Instead, when it finds something unexpected, it adds a 'defect' to
  17. the current message.  Defects are just instances that live on the message
  18. object's .defects attribute.
  19. """
  20. __all__ = [
  21.     'FeedParser']
  22. import re
  23. from email import errors
  24. from email import message
  25. NLCRE = re.compile('\r\n|\r|\n')
  26. NLCRE_bol = re.compile('(\r\n|\r|\n)')
  27. NLCRE_eol = re.compile('(\r\n|\r|\n)$')
  28. NLCRE_crack = re.compile('(\r\n|\r|\n)')
  29. headerRE = re.compile('^(From |[\\041-\\071\\073-\\176]{1,}:|[\\t ])')
  30. EMPTYSTRING = ''
  31. NL = '\n'
  32. NeedMoreData = object()
  33.  
  34. class BufferedSubFile(object):
  35.     '''A file-ish object that can have new data loaded into it.
  36.  
  37.     You can also push and pop line-matching predicates onto a stack.  When the
  38.     current predicate matches the current line, a false EOF response
  39.     (i.e. empty string) is returned instead.  This lets the parser adhere to a
  40.     simple abstraction -- it parses until EOF closes the current message.
  41.     '''
  42.     
  43.     def __init__(self):
  44.         self._partial = ''
  45.         self._lines = []
  46.         self._eofstack = []
  47.         self._closed = False
  48.  
  49.     
  50.     def push_eof_matcher(self, pred):
  51.         self._eofstack.append(pred)
  52.  
  53.     
  54.     def pop_eof_matcher(self):
  55.         return self._eofstack.pop()
  56.  
  57.     
  58.     def close(self):
  59.         self._lines.append(self._partial)
  60.         self._partial = ''
  61.         self._closed = True
  62.  
  63.     
  64.     def readline(self):
  65.         if not self._lines:
  66.             if self._closed:
  67.                 return ''
  68.             
  69.             return NeedMoreData
  70.         
  71.         line = self._lines.pop()
  72.         for ateof in self._eofstack[::-1]:
  73.             if ateof(line):
  74.                 self._lines.append(line)
  75.                 return ''
  76.                 continue
  77.         
  78.         return line
  79.  
  80.     
  81.     def unreadline(self, line):
  82.         if not line is not NeedMoreData:
  83.             raise AssertionError
  84.         self._lines.append(line)
  85.  
  86.     
  87.     def push(self, data):
  88.         '''Push some new data into this object.'''
  89.         data = self._partial + data
  90.         self._partial = ''
  91.         parts = NLCRE_crack.split(data)
  92.         self._partial = parts.pop()
  93.         lines = []
  94.         for i in range(len(parts) // 2):
  95.             lines.append(parts[i * 2] + parts[i * 2 + 1])
  96.         
  97.         self.pushlines(lines)
  98.  
  99.     
  100.     def pushlines(self, lines):
  101.         self._lines[:0] = lines[::-1]
  102.  
  103.     
  104.     def is_closed(self):
  105.         return self._closed
  106.  
  107.     
  108.     def __iter__(self):
  109.         return self
  110.  
  111.     
  112.     def next(self):
  113.         line = self.readline()
  114.         if line == '':
  115.             raise StopIteration
  116.         
  117.         return line
  118.  
  119.  
  120.  
  121. class FeedParser:
  122.     '''A feed-style parser of email.'''
  123.     
  124.     def __init__(self, _factory = message.Message):
  125.         '''_factory is called with no arguments to create a new message obj'''
  126.         self._factory = _factory
  127.         self._input = BufferedSubFile()
  128.         self._msgstack = []
  129.         self._parse = self._parsegen().next
  130.         self._cur = None
  131.         self._last = None
  132.         self._headersonly = False
  133.  
  134.     
  135.     def _set_headersonly(self):
  136.         self._headersonly = True
  137.  
  138.     
  139.     def feed(self, data):
  140.         '''Push more data into the parser.'''
  141.         self._input.push(data)
  142.         self._call_parse()
  143.  
  144.     
  145.     def _call_parse(self):
  146.         
  147.         try:
  148.             self._parse()
  149.         except StopIteration:
  150.             pass
  151.  
  152.  
  153.     
  154.     def close(self):
  155.         '''Parse all remaining data and return the root message object.'''
  156.         self._input.close()
  157.         self._call_parse()
  158.         root = self._pop_message()
  159.         if not not (self._msgstack):
  160.             raise AssertionError
  161.         if root.get_content_maintype() == 'multipart' and not root.is_multipart():
  162.             root.defects.append(errors.MultipartInvariantViolationDefect())
  163.         
  164.         return root
  165.  
  166.     
  167.     def _new_message(self):
  168.         msg = self._factory()
  169.         if self._cur and self._cur.get_content_type() == 'multipart/digest':
  170.             msg.set_default_type('message/rfc822')
  171.         
  172.         if self._msgstack:
  173.             self._msgstack[-1].attach(msg)
  174.         
  175.         self._msgstack.append(msg)
  176.         self._cur = msg
  177.         self._last = msg
  178.  
  179.     
  180.     def _pop_message(self):
  181.         retval = self._msgstack.pop()
  182.         if self._msgstack:
  183.             self._cur = self._msgstack[-1]
  184.         else:
  185.             self._cur = None
  186.         return retval
  187.  
  188.     
  189.     def _parsegen(self):
  190.         self._new_message()
  191.         headers = []
  192.         for line in self._input:
  193.             if line is NeedMoreData:
  194.                 yield NeedMoreData
  195.                 continue
  196.             
  197.             if not headerRE.match(line):
  198.                 if not NLCRE.match(line):
  199.                     self._input.unreadline(line)
  200.                 
  201.                 break
  202.             
  203.             headers.append(line)
  204.         
  205.         self._parse_headers(headers)
  206.         if self._headersonly:
  207.             lines = []
  208.             while True:
  209.                 line = self._input.readline()
  210.                 if line is NeedMoreData:
  211.                     yield NeedMoreData
  212.                     continue
  213.                 
  214.                 if line == '':
  215.                     break
  216.                 
  217.                 lines.append(line)
  218.             self._cur.set_payload(EMPTYSTRING.join(lines))
  219.             return None
  220.         
  221.         if self._cur.get_content_type() == 'message/delivery-status':
  222.             while True:
  223.                 self._input.push_eof_matcher(NLCRE.match)
  224.                 for retval in self._parsegen():
  225.                     if retval is NeedMoreData:
  226.                         yield NeedMoreData
  227.                         continue
  228.                     
  229.                 
  230.                 msg = self._pop_message()
  231.                 self._input.pop_eof_matcher()
  232.                 while True:
  233.                     line = self._input.readline()
  234.                     if line is NeedMoreData:
  235.                         yield NeedMoreData
  236.                         continue
  237.                     
  238.                     break
  239.                 while True:
  240.                     line = self._input.readline()
  241.                     if line is NeedMoreData:
  242.                         yield NeedMoreData
  243.                         continue
  244.                     
  245.                     break
  246.                 if line == '':
  247.                     break
  248.                 
  249.                 self._input.unreadline(line)
  250.             return None
  251.         
  252.         if self._cur.get_content_maintype() == 'message':
  253.             for retval in self._parsegen():
  254.                 if retval is NeedMoreData:
  255.                     yield NeedMoreData
  256.                     continue
  257.                 
  258.             
  259.             self._pop_message()
  260.             return None
  261.         
  262.         if self._cur.get_content_maintype() == 'multipart':
  263.             boundary = self._cur.get_boundary()
  264.             if boundary is None:
  265.                 self._cur.defects.append(errors.NoBoundaryInMultipartDefect())
  266.                 lines = []
  267.                 for line in self._input:
  268.                     if line is NeedMoreData:
  269.                         yield NeedMoreData
  270.                         continue
  271.                     
  272.                     lines.append(line)
  273.                 
  274.                 self._cur.set_payload(EMPTYSTRING.join(lines))
  275.                 return None
  276.             
  277.             separator = '--' + boundary
  278.             boundaryre = re.compile('(?P<sep>' + re.escape(separator) + ')(?P<end>--)?(?P<ws>[ \\t]*)(?P<linesep>\\r\\n|\\r|\\n)?$')
  279.             capturing_preamble = True
  280.             preamble = []
  281.             linesep = False
  282.             while True:
  283.                 line = self._input.readline()
  284.                 if line is NeedMoreData:
  285.                     yield NeedMoreData
  286.                     continue
  287.                 
  288.                 if line == '':
  289.                     break
  290.                 
  291.                 mo = boundaryre.match(line)
  292.                 if mo:
  293.                     if mo.group('end'):
  294.                         linesep = mo.group('linesep')
  295.                         break
  296.                     
  297.                     if capturing_preamble:
  298.                         if preamble:
  299.                             lastline = preamble[-1]
  300.                             eolmo = NLCRE_eol.search(lastline)
  301.                             if eolmo:
  302.                                 preamble[-1] = lastline[:-len(eolmo.group(0))]
  303.                             
  304.                             self._cur.preamble = EMPTYSTRING.join(preamble)
  305.                         
  306.                         capturing_preamble = False
  307.                         self._input.unreadline(line)
  308.                         continue
  309.                     
  310.                     while True:
  311.                         line = self._input.readline()
  312.                         if line is NeedMoreData:
  313.                             yield NeedMoreData
  314.                             continue
  315.                         
  316.                         mo = boundaryre.match(line)
  317.                         if not mo:
  318.                             self._input.unreadline(line)
  319.                             break
  320.                             continue
  321.                     self._input.push_eof_matcher(boundaryre.match)
  322.                     for retval in self._parsegen():
  323.                         if retval is NeedMoreData:
  324.                             yield NeedMoreData
  325.                             continue
  326.                         
  327.                     
  328.                     if self._last.get_content_maintype() == 'multipart':
  329.                         epilogue = self._last.epilogue
  330.                         if epilogue == '':
  331.                             self._last.epilogue = None
  332.                         elif epilogue is not None:
  333.                             mo = NLCRE_eol.search(epilogue)
  334.                             if mo:
  335.                                 end = len(mo.group(0))
  336.                                 self._last.epilogue = epilogue[:-end]
  337.                             
  338.                         
  339.                     else:
  340.                         payload = self._last.get_payload()
  341.                         if isinstance(payload, basestring):
  342.                             mo = NLCRE_eol.search(payload)
  343.                             if mo:
  344.                                 payload = payload[:-len(mo.group(0))]
  345.                                 self._last.set_payload(payload)
  346.                             
  347.                         
  348.                     self._input.pop_eof_matcher()
  349.                     self._pop_message()
  350.                     self._last = self._cur
  351.                     continue
  352.                 if not capturing_preamble:
  353.                     raise AssertionError
  354.                 preamble.append(line)
  355.             if capturing_preamble:
  356.                 self._cur.defects.append(errors.StartBoundaryNotFoundDefect())
  357.                 self._cur.set_payload(EMPTYSTRING.join(preamble))
  358.                 epilogue = []
  359.                 for line in self._input:
  360.                     if line is NeedMoreData:
  361.                         yield NeedMoreData
  362.                         continue
  363.                         continue
  364.                 
  365.                 self._cur.epilogue = EMPTYSTRING.join(epilogue)
  366.                 return None
  367.             
  368.             if linesep:
  369.                 epilogue = [
  370.                     '']
  371.             else:
  372.                 epilogue = []
  373.             for line in self._input:
  374.                 if line is NeedMoreData:
  375.                     yield NeedMoreData
  376.                     continue
  377.                 
  378.                 epilogue.append(line)
  379.             
  380.             if epilogue:
  381.                 firstline = epilogue[0]
  382.                 bolmo = NLCRE_bol.match(firstline)
  383.                 if bolmo:
  384.                     epilogue[0] = firstline[len(bolmo.group(0)):]
  385.                 
  386.             
  387.             self._cur.epilogue = EMPTYSTRING.join(epilogue)
  388.             return None
  389.         
  390.         lines = []
  391.         for line in self._input:
  392.             if line is NeedMoreData:
  393.                 yield NeedMoreData
  394.                 continue
  395.             
  396.             lines.append(line)
  397.         
  398.         self._cur.set_payload(EMPTYSTRING.join(lines))
  399.  
  400.     
  401.     def _parse_headers(self, lines):
  402.         lastheader = ''
  403.         lastvalue = []
  404.         for lineno, line in enumerate(lines):
  405.             if line[0] in ' \t':
  406.                 if not lastheader:
  407.                     defect = errors.FirstHeaderLineIsContinuationDefect(line)
  408.                     self._cur.defects.append(defect)
  409.                     continue
  410.                 
  411.                 lastvalue.append(line)
  412.                 continue
  413.             
  414.             if lastheader:
  415.                 lhdr = EMPTYSTRING.join(lastvalue)[:-1].rstrip('\r\n')
  416.                 self._cur[lastheader] = lhdr
  417.                 lastheader = ''
  418.                 lastvalue = []
  419.             
  420.             if line.startswith('From '):
  421.                 if lineno == 0:
  422.                     mo = NLCRE_eol.search(line)
  423.                     if mo:
  424.                         line = line[:-len(mo.group(0))]
  425.                     
  426.                     self._cur.set_unixfrom(line)
  427.                     continue
  428.                 elif lineno == len(lines) - 1:
  429.                     self._input.unreadline(line)
  430.                     return None
  431.                 else:
  432.                     defect = errors.MisplacedEnvelopeHeaderDefect(line)
  433.                     self._cur.defects.append(defect)
  434.             
  435.             i = line.find(':')
  436.             if i < 0:
  437.                 defect = errors.MalformedHeaderDefect(line)
  438.                 self._cur.defects.append(defect)
  439.                 continue
  440.             
  441.             lastheader = line[:i]
  442.             lastvalue = [
  443.                 line[i + 1:].lstrip()]
  444.         
  445.         if lastheader:
  446.             self._cur[lastheader] = EMPTYSTRING.join(lastvalue).rstrip('\r\n')
  447.         
  448.  
  449.  
  450.